1. Integrate data: MB14, MB15, MB17 + allen(glia)

cms_scz <- lapply(setNames(c("MB14", "MB15", "MB17"), c("MB14", "MB15", "MB17")), function(smplname){
             cm <- Seurat::Read10X_h5(
                paste0("/home/mbatiuk/projects/human_schizo/MB6-MB23/counts_Allen_Genome/MB14_MB15_MB17_proper_gtf/",
                       smplname, "/outs/filtered_feature_bc_matrix.h5"))
             colnames(cm) <- paste0(smplname, "_", colnames(cm))
             cm})
annotations_scz2 <- read_rds("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/annotations_scz2.rds")
cms_scz <- lapply(cms_scz, function(smpl) {smpl[, colnames(smpl) %in% names(annotations_scz2$high.clean)]}) 

Filter Allen dataset, retain non-neural cells

#load Allen annotation

annotation_allen <- fread("/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/sample_annotations.csv")

#schizo annotations and subtype orders
annotations_scz2 <- readRDS("/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/annotations_scz2.rds")

hsub_order <- read_rds("/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/hsub_order.rds")
msub_order <- read_rds("/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/msub_order.rds")


#load Allen annotation
annotation_allen <- fread(file = "/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/sample_annotations.csv")

#Allen 2019 human data, smart-seq4 
#https://transcriptomic-viewer-downloads.s3-us-west-2.amazonaws.com/human/transcriptome.zip


allen_exon <- read_tome_dgCMatrix(tome = "/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/transcrip.tome", 
                                  target = "data/t_exon")
allen_intron <- read_tome_dgCMatrix(tome = "/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/transcrip.tome", 
                                    target = "data/t_intron")


#load sample and gene names
allen_smpl_names <- read_tome_sample_names(tome = "/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/transcrip.tome")
allen_gene_names <- read_tome_gene_names(tome = "/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/Allen_31.01.2020/transcrip.tome")

#sum up introns + exons:
allen_cm <- allen_exon + allen_intron

#add colnames and rownames to matrix
rownames(x = allen_cm) <- allen_gene_names
colnames(x = allen_cm) <- allen_smpl_names
allen_list_1 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  annotation_allen$region_label == "CgG" &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("ACC_", annotation_allen$external_donor_name_label %>% unique()))


allen_list_2 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  annotation_allen$region_label == "MTG"  &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("MTG_", annotation_allen$external_donor_name_label %>% unique()))


allen_list_3 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  (annotation_allen$region_label == "M1ul" | annotation_allen$region_label == "M1lm") &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("M1_", annotation_allen$external_donor_name_label %>% unique()))


allen_list_4 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  annotation_allen$region_label == "V1C"  &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("V1C_", annotation_allen$external_donor_name_label %>% unique()))




allen_list_5 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  (annotation_allen$region_label == "S1ul" | annotation_allen$region_label == "S1lm") &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("S1_", annotation_allen$external_donor_name_label %>% unique()))


allen_list_6 <- lapply(annotation_allen$external_donor_name_label %>% unique(), 
                     function(x) 
                       allen_cm[ ,annotation_allen$external_donor_name_label == x &
                                  annotation_allen$region_label == "A1C"  &
                                   annotation_allen$class_label == "Non-neuronal"]) %>% 
  setNames(paste0("A1C_", annotation_allen$external_donor_name_label %>% unique()))

allen_list <- c(allen_list_1, allen_list_2, allen_list_3, allen_list_4, allen_list_5, allen_list_6)

Make Seurat Visium object

#merge allen and scz countmatrics lists
scz_all_cms <- append(cms_scz, allen_list)


#create Seurat objects
seu_scz_all <- lapply(scz_all_cms, CreateSeuratObject)


#run SCTransform on all memebers
seu_scz_all <- lapply(seu_scz_all, SCTransform) 


#select features for downstream integration
scz_all_features <- SelectIntegrationFeatures(object.list = seu_scz_all, nfeatures = 3000)

#run PrepSCTIntegration
seu_scz_all <- PrepSCTIntegration(object.list = seu_scz_all, anchor.features = scz_all_features)


#find anchors
scz_all_anchors <- FindIntegrationAnchors(object.list = seu_scz_all, normalization.method = "SCT", 
                                          anchor.features = scz_all_features,
                                          k.filter = 130 #needed to decrease, error when integr small and big datasets
                                          )


#integrate schizo and allen data
scz_all_anchors <- readRDS("/d0/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/rds_objects/scz_all_anchors.rds")
scz_all_integrated <- IntegrateData(anchorset = scz_all_anchors, normalization.method = "SCT")


#Run PCA
scz_all_integrated <- RunPCA(scz_all_integrated)

#Run UMAP
scz_all_integrated <- RunUMAP(scz_all_integrated, dims = 1:30)
saveRDS(scz_all_integrated, "scz_all_integrated.RDS")

load data

cm_glia <- read_rds("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/rds_objects/allen_10x_m1_non_neur.rds")
annot_glia <- read_rds("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/rds_objects/anno_allen_10x.rds")
so <- readRDS("scz_all_integrated.RDS")
annotation <- read_rds("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/rds_objects/annotation_scz_allen.rds")

so$subtypes_med <- annotation$med[names(Idents(so))]
so$subtypes_high <- annotation$high[names(Idents(so))]
so$origin <- setNames(
  gsub("^[^(MB)].*", "Allen", names(annotation$high)) %>% gsub("^MB.*", "Scz", .),
  names(annotation$high))[names(Idents(so))]

so_subs <- subset(so, (origin == "Scz") & (subtypes_high != "Glia"))

#embeddingPlot(so_subs@reductions$umap@cell.embeddings, groups=so$subtypes_high, size=0.1, font.size=c(2,3))
visium_allen_old <- read_rds("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/rds_objects/visium_allen.rds")

Estimate variable genes for stereoscope deconvolotion

var_genes_3k <- visium_allen_old$MB11 %>% FindVariableFeatures(assay="Spatial", nfeatures=3000) %>% 
  .@assays %>% .$Spatial %>% .@var.features
var_genes_1k <- visium_allen_old$MB11 %>% FindVariableFeatures(assay="Spatial", nfeatures=1000) %>% 
  .@assays %>% .$Spatial %>% .@var.features

Save the data for stereoscope

asChn <- function(x) setNames(as.character(x), names(x))
cm_merged <- so_subs@assays$RNA@counts %>% list(cm_glia) %>% 
  conos:::mergeCountMatrices(transposed=F)
annotation_merged <- asChn(so_subs$subtypes_med) %>% c(asChn(annot_glia$med))
annotation_merged_high <- asChn(so_subs$subtypes_high) %>% c(asChn(annot_glia$high))
p_vals <- colSums(cm_merged > 0) %>% split(annotation_merged[names(.)]) %>% 
  sapply(mean)
ggplot(tibble(y=p_vals, n=names(p_vals))) +
  geom_bar(aes(x=n, y=y), stat="identity") +
  theme(axis.text=element_text(angle=45, hjust=1))
cm_merged[var_genes_3k,] %>% Matrix::t() %>% as.matrix() %>% 
  as.data.frame() %>% data.table::fwrite("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/cm_with_glia2.tsv", sep="\t", row.names=T)
cm_norm <- Pagoda2$new(cm_merged)$counts
cm_norm[, var_genes_3k] %>% as.matrix() %>% as.data.frame() %>% 
  data.table::fwrite("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/cm_with_glia_norm2.tsv", sep="\t", row.names=T)
tr <- names(visium_allen) %>% 
    pblapply(function(n) {
      cm <- visium_allen[[n]]@assays$Spatial@counts
      Matrix::t(cm[var_genes_3k,]) %>% as.matrix() %>% as.data.frame() %>% 
        data.table::fwrite(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/", n, "_cm.tsv"), sep="\t", row.names=T)
      cm.norm <- Pagoda2$new(cm, verbose=FALSE)$counts
      cm.norm[, var_genes_3k] %>% as.matrix() %>% as.data.frame() %>% 
        data.table::fwrite(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/", n, "_cm_norm.tsv"), sep="\t", row.names=T)
      
      cm.norm[, var_genes_1k] %>% as.matrix() %>% as.data.frame() %>% 
        data.table::fwrite(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/", n, "_cm_norm_1k.tsv"), sep="\t", row.names=T)
})
as_tibble(annotation_merged, rownames="cell") %>% 
  set_colnames(c("cell", "bio_celltype")) %>% 
  write_delim("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/annotation_med2.tsv", delim="\t")
as_tibble(annotation_merged_high, rownames="cell") %>% 
  set_colnames(c("cell", "bio_celltype")) %>% 
  write_delim("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/annotation_med_high.tsv", delim="\t")

STEREOSCOPE

taskset --cpu-list 0-14 stereoscope run --sc_cnt cm_with_glia2.tsv --sc_labels annotation_med_high.tsv -sce 20000  -o deconv -n 5000 --st_cnt MB*cm.tsv -ste 30000 -stb 1000 -scb 1000

2. Read results from stereoscope and plot visium panel on MB11

library(pagoda2)
library(magrittr)
library(tidyverse)
library(pbapply)
library(dataorganizer)
library(Seurat)
library(scrattch.io)

Code below is from revision, therefore some chunks might contatin just functions which load new data (since cohort 1 was created in the exactly same way)

#find the ensemble count folder of spatial data cohort 2
dataPath <- function(...) 
  file.path("/d0/home/mbatiuk/projects/human_schizo/primary/spatial/counts/ENSEMBLE_97_genome_cohort2/", ...)
annotationPath <- function(...)
  file.path("/d0/home/kdragicevic/RProjects/schizo/spatial/layeranno", ...)
kDatasetNames <- c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52") %>% 
  setNames(., .)
#cohort 2:
kConditionPerDataset <- c(MB6="SCZ", MB50="SCZ", MB52="SCZ", 
                          MB9="CNT", MB19="CNT", MB21="CNT")
samples <- kDatasetNames %>% pblapply(function(n) {
 sp <- dataPath(n, "outs/spatial/tissue_positions_list.csv") %>% 
     read_csv(col_names=F) %>% as.data.frame() %>% set_rownames(.$X1) %>% .[,5:6] %>% 
     set_colnames(c("x", "y"))
   cm <- dataPath(n, "outs/filtered_feature_bc_matrix.h5") %>% Seurat::Read10X_h5()
   ann <- paste0(n, "_MB.csv") %>% annotationPath() %>% read_csv() %$% 
     setNames(.[[2]], Barcode) %>% .[. != "WM"]
   cm <- cm[, names(ann)]
   sp <- sp[names(ann),]
   colnames(cm) <- rownames(sp) <- names(ann) <- paste0(n, "_", names(ann))
   p2 <- vpscutils::GetPagoda(cm, n.cores=10, embeding.type="UMAP", verbose=FALSE)
   list(cm=cm, annotation=ann, position=sp, p2=p2)})
 
 write_rds(samples, "/d0/home/kdragicevic/RProjects/schizo/spatial/spatial_samples2.rds")

samples1 (cohort 1) and samples 2 (cohort 2)

#samples_sp1 <-  read_rds("/d0-mendel/home/viktor_petukhov/Copenhagen/Schizophrenia20/cache/spatial_samples.rds")
#samples_sp2 <- read_rds("/d0/home/kdragicevic/RProjects/schizo/spatial/spatial_samples2.rds")

subtype position in tissue plot

visium_allen_plot <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")),
                       function(smpl) {
                         Load10X_Spatial(paste0(
                           "/home/mbatiuk/projects/human_schizo/MB6-MB23/primary/spatial/counts/Allen_genome/", smpl, "/outs/"), 
                           slice = smpl)})
names(visium_allen_plot) <- names(visium_allen_plot) %>% gsub("MB18-2", "MB18.2", .)

#Import layer labels with filtered out iffy areas
layers.filter.list <- lapply(
  setNames(c("MB11", "MB12", "MB14", "MB15", "MB18.2", "MB22", "MB23", "MB7"),
  c("MB11", "MB12", "MB14", "MB15", "MB18.2", "MB22", "MB23", "MB7")),
  function(x) {
    df <- read.csv(paste0("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/layers_filtered/", x, ".csv"), 
             header = T)
    setNames(df[, 2], df[, 1])})
## add layers
visium_allen_plot <- lapply(setNames(c("MB11", "MB12", "MB14", "MB15", "MB18.2", "MB22", "MB23", "MB7"),
  c("MB11", "MB12", "MB14", "MB15", "MB18.2", "MB22", "MB23", "MB7")),
    function(x) {
   #Also set white matter as exclude   
   visium_allen_plot[[x]]$Layers <- layers.filter.list[[x]][names(Idents(visium_allen_plot[[x]]))] %>% as.vector %>%
     replace_na("Exclude") %>% gsub("^WM$", "Exclude", .) %>% setNames(names(Idents(visium_allen_plot[[x]])))
   visium_allen_plot[[x]]})

## Subset visium seurat objects to remove NAs - no layer annotation, and WM
visium_allen_plot <- lapply(visium_allen_plot, function(x) {subset(x, subset = Layers == "Exclude", invert = T)})
## Add stereoscope predictions
visium_allen_plot <- lapply(setNames(names(visium_allen_plot), names(visium_allen_plot)), function(x) {
  anno.med <- stereo.med
  anno.med$MB18.2 <- anno.med$`MB18-2`
  anno.med$`MB18-2` <- NULL
  anno.high <- stereo.high
  anno.high$MB18.2 <- anno.high$`MB18-2`
  anno.high$`MB18-2` <- NULL
  visium_allen_plot[[x]][["Stereo_MED"]] <- anno.med[[x]][names(Idents(visium_allen_plot[[x]])), ] %>% t %>% CreateAssayObject
  visium_allen_plot[[x]][["Stereo_HIGH"]] <- anno.high[[x]][names(Idents(visium_allen_plot[[x]])), ] %>% t %>% CreateAssayObject
  visium_allen_plot[[x]]})

Plot Medium stereoscope res with seurat

visium_allen_plot <- lapply(visium_allen_plot, function(x) {
  DefaultAssay(x) <- "Stereo_MED"
  x})

lots.seur.stereo.MED <- lapply(setNames(names(visium_allen_plot), names(visium_allen_plot)),
                                      function(x) {
                        SpatialFeaturePlot(visium_allen_plot[[x]], 
                                           features = rownames(visium_allen_plot[[x]][["Stereo_MED"]]), 
                                           pt.size.factor = 1, 
                                           ncol = 2, 
                                           crop = F)})

same for HIGH annotation

visium_allen_plot <- lapply(visium_allen_plot, function(x) {
  DefaultAssay(x) <- "Stereo_HIGH"
  x})

plots.seur.stereo.HIGH <- lapply(setNames(names(visium_allen_plot), names(visium_allen_plot)),
                                      function(x) {
                                        SpatialFeaturePlot(visium_allen_plot[[x]], 
                                                           features = rownames(visium_allen_plot[[x]][["Stereo_HIGH"]]),
                                                           pt.size.factor = 1,
                                                           ncol = 2,
                                                           crop = F)})

Plot layers based on MB11 sample

#set default assay
DefaultAssay(visium_allen_plot$MB11) <- "Stereo_HIGH"
MB11.stereo.plot.list <- 
  append(list(Layers = SpatialPlot(visium_allen_plot$MB11, 
                                   group.by = "Layers", 
                                   pt.size.factor = 1.6, 
                                   crop = T, 
                                   cols = palette_45[1:6],
                                   stroke = 0) + 
  labs(subtitle = "Layers") + theme(plot.subtitle = element_text(size = 11, hjust = 0.5, face = "bold"), 
                            legend.text = element_text(size = 11), legend.position = "bottom", 
                            legend.title = element_blank(),
          plot.margin = unit(c(0, 0, 0, 0), "mm"),
          legend.spacing.x = unit(0, "mm")) +
    guides(fill = guide_legend(override.aes = list(alpha = 1, size = 2), nrow = 1))),
  lapply(setNames(hsub_order_allen_10x %>% gsub("_", "-", .), hsub_order_allen_10x), function(subt) {
                      SpatialPlot(visium_allen_plot[["MB11"]], features = subt, pt.size.factor = 1.6, crop = T, alpha = c(0.5, 4), stroke = 0) +
  labs(subtitle = subt %>% gsub("-", "_", .)) +
                        theme(plot.subtitle = element_text(size = 11, hjust = 0.5, face = "bold"), 
                                legend.text = element_text(size = 11), legend.position = "bottom", 
                                legend.title = element_blank(), legend.key.width = unit(10, "mm"),
                              legend.key.height = unit(3, "mm"),
                              plot.margin = unit(c(0, 0, 0, 0), "mm"))})) %>%
  cowplot::plot_grid(plotlist=., ncol=5)

3. Cell proportions

code is duplicated because of existance of 2 cohorts

layers.filter.list0 <- lapply(
  setNames(c("MB11", "MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"),
  c("MB11", "MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")),
  function(x) {
    df <- read.csv(paste0("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/schizo_visium/layers_filtered/", x, ".csv"), 
             header = T)
    setNames(df[, 2], df[, 1])})

layers.filter.list1 <- lapply(
  setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"),
  c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52")),
  function(x) {
    df <- read.csv(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/layeranno/", x, "_MB.csv"), 
             header = T)
    setNames(df[, 2], df[, 1])})

layers.filter.list2 <- c(layers.filter.list1,layers.filter.list0)

MEDIUM

stereo.med1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                       y <-        read.delim(paste0("/home/viktor_petukhov/Data/Schizophrenia/deconv_v2/stereoscope_res/",
                                                    x, "_cm.tsv"), row.names = 1)
                       #filter low quality spots
                        y[rownames(y) %in% names(layers.filter.list0[[x]]), ]})

stereo.med1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                       y <- stereo.med1[[x]]
                       #change rownames
                       colnames(y) <- colnames(y) %>% gsub("\\.", "_", x = .)
                       y})

stereo.med2 <- lapply(setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"), 
                                c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52")), function(x) {
                       y <- read.delim(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/deconv2/",
                                                    x, ".tsv"), row.names = 1)
                       #filter low quality spots
                        y[rownames(y) %in% names(layers.filter.list1[[x]]), ]})

stereo.med2 <- lapply(setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"), 
                                c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52")), function(x) {
                       y <- stereo.med2[[x]]
                       #change rownames
                       colnames(y) <- colnames(y) %>% gsub("\\.", "_", x = .)
                       y})

stereo.med <- c(stereo.med1, stereo.med2)

HIGH

stereo.high1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                       y <-        read.delim(paste0("/d0/home/viktor_petukhov/Data/Schizophrenia/deconv_v2/stereoscope_res_high/",
                                                    x, "_cm.tsv"), row.names = 1)
                       #filter low quality spots
                         y[rownames(y) %in% names(layers.filter.list0[[x]]), ]})

stereo.high1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                       y <- stereo.high1[[x]]
                       #change rownames
                        colnames(y) <- colnames(y) %>% gsub("\\.", "_", x = .)
                        y})

stereo.high2 <- lapply(setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"), 
                                c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52")), function(x) {
                       y <- read.delim(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/deconv/",
                                                    x, ".tsv"), row.names = 1)
                       #filter low quality spots
                         y[rownames(y) %in% names(layers.filter.list1[[x]]), ]})

stereo.high2 <- lapply(setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"), 
                                c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52")), function(x) {
                       y <- stereo.high2[[x]]
                       #change rownames
                        colnames(y) <- colnames(y) %>% gsub("\\.", "_", x = .)
                        y})

stereo.high <- c(stereo.high1, stereo.high2)

load order

hsub_order <- readRDS("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/hsub_order.rds")
msub_order <- readRDS("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/msub_order.rds")
hsub_order_allen_10x <- factor(c(hsub_order[-c(length(hsub_order), length(hsub_order) -1)], 
                            "Astro_L1_FGFR3_SERPINI2", "Astro_L1_6_FGFR3_AQP1", "Astro_L1_6_FGFR3_PLCG1", 
                            "Oligo_L2_6_OPALIN_FTH1P3", "Oligo_L3_6_OPALIN_ENPP6", "Oligo_L2_6_OPALIN_MAP6D1",
                            "Oligo_L5_6_OPALIN_LDLRAP1", "OPC_L1_6_PDGFRA_COL20A1", "Micro_L1_6_TYROBP_CD74", 
                            "Endo_L2_5_NOSTRIN_SRGN", "VLMC_L1_5_PDGFRA_COLEC12"), 
                           
                           levels = c(hsub_order[-c(length(hsub_order), length(hsub_order) -1)], 
                             "Astro_L1_FGFR3_SERPINI2", "Astro_L1_6_FGFR3_AQP1", "Astro_L1_6_FGFR3_PLCG1", 
                            "Oligo_L2_6_OPALIN_FTH1P3", "Oligo_L3_6_OPALIN_ENPP6", "Oligo_L2_6_OPALIN_MAP6D1",
                            "Oligo_L5_6_OPALIN_LDLRAP1", "OPC_L1_6_PDGFRA_COL20A1", "Micro_L1_6_TYROBP_CD74", 
                            "Endo_L2_5_NOSTRIN_SRGN", "VLMC_L1_5_PDGFRA_COLEC12"),
                           ordered = T)
msub_order_allen_10x <- factor(c(msub_order[-c(length(msub_order), length(msub_order) -1)], 
                            "Astro", "Oligo", "OPC", "Micro_PVM", "Endo", "VLMC"), 
                           levels = c(msub_order[-c(length(msub_order), length(msub_order) -1)], 
                             "Astro", "Oligo", "OPC", "Micro_PVM", "Endo", "VLMC"),ordered = T)

Filter WM - not well represented in Scz and has variable size - skew in bulk analysis. Make data frames

coh <- setNames(names(stereo.med),names(stereo.med))

library(data.table)
coh2 <- setNames(c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"),
  c("MB6", "MB9", "MB19", "MB21", "MB50", "MB52"))


c(MB6="SCZ", MB50="SCZ", MB52="SCZ", 
                          MB9="CNT", MB19="CNT", MB21="CNT")

samplegroups2 <- list(
  control = c('MB7', 'MB9', 'MB11', 'MB13', 'MB15', 'MB16', 'MB17', 'MB19', 'MB20', 'MB21', 'MB18-2'),
  schizo = c('MB6', 'MB10', 'MB12', 'MB14', 'MB22', 'MB23','MB8-2', 'MB50', 'MB52')
)

diseasef <- as.factor(setNames(rep(names(samplegroups2),unlist(lapply(samplegroups2,length))),unlist(samplegroups2)))

library(Matrix)
#MED
predict.stereo.med <- 
  lapply(coh, function(x) {
  t(stereo.med[[x]])[, names(layers.filter.list2[[x]][!layers.filter.list2[[x]] == "WM"])] %>% 
   apply(1, mean)
  }
) %>% as.data.frame %>% data.frame(subtypes = factor(rownames(.), levels = msub_order_allen_10x), .)
predict.stereo.med.melt <- melt(predict.stereo.med, id.vars = "subtypes", variable.name = "samples", 
                          value.name = "Mean probability")
#add condition
predict.stereo.med.melt$condition <- diseasef[predict.stereo.med.melt$samples %>% as.vector]
#HIGH
predict.stereo.high <- 
  lapply(coh, function(x) {
  t(stereo.high[[x]])[, names(layers.filter.list2[[x]][!layers.filter.list2[[x]] == "WM"])] %>% 
   apply(1, mean)
  }
) %>% as.data.frame %>% data.frame(subtypes = factor(rownames(.), levels = hsub_order_allen_10x), .)
predict.stereo.high.melt <- melt(predict.stereo.high, id.vars = "subtypes", variable.name = "samples", 
                          value.name = "Mean probability")
#add condition
predict.stereo.high.melt$condition <- diseasef[predict.stereo.high.melt$samples %>% as.vector]

LAYERS stereo data frames

#MED LAYERS
stereo.med.layers1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                   y <- read.delim(paste0("/home/viktor_petukhov/Data/Schizophrenia/deconv_v2/stereoscope_res/",
                                                    x, "_cm.tsv"), row.names = 1) %>% t
                   #correct annotations                 
                   rownames(y) <- rownames(y) %>% gsub("\\.", "_", x = .)                       
                   #filter low quality spots
                   y[, colnames(y) %in% names(layers.filter.list0[[x]])] %>%
                    data.frame(subtypes = factor(rownames(.), levels = msub_order_allen_10x), .) %>%
                    melt(id.vars = "subtypes", variable.name = "barcodes", value.name = "probability") %>%
                    cbind(., sample = rep(x, dim(.)[1])) %>% cbind(., condition = diseasef[rep(x, dim(.)[1])]) %>%
                    cbind(., layer = `[`(layers.filter.list2[[x]], .[["barcodes"]])) %>%
                     group_by(., subtypes, sample, condition, layer) %>% 
                     summarize_at(., "probability", list(mean_probability = mean, spot_number = length))})


stereo.med.layers2 <- lapply(coh2 , function(x) {
                       y <- read.delim(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/deconv2/",
                                                    x, ".tsv"), row.names = 1) %>% t
                       #correct annotations   
                       rownames(y) <- rownames(y) %>% gsub("\\.", "_", x = .)   
                       #filter low quality spots
                       y[, colnames(y) %in% names(layers.filter.list1[[x]])] %>%
                        data.frame(subtypes = factor(rownames(.), levels = msub_order_allen_10x), .) %>%
                        melt(id.vars = "subtypes", variable.name = "barcodes", value.name = "probability") %>%
                        cbind(., sample = rep(x, dim(.)[1])) %>% cbind(., condition = diseasef[rep(x, dim(.)[1])]) %>%
                        cbind(., layer = `[`(layers.filter.list2[[x]], .[["barcodes"]])) %>%
                         group_by(., subtypes, sample, condition, layer) %>% 
                         summarize_at(., "probability", list(mean_probability = mean, spot_number = length))})

stereo.med.layers<- c(stereo.med.layers1, stereo.med.layers2)
stereo.med.layers.comb <- bind_rows(stereo.med.layers)

#Filter out areas/layers with less than 5 spots
stereo.med.layers.comb.filt <- stereo.med.layers.comb[stereo.med.layers.comb$spot_number > 5, ]

#Filter out WM
stereo.med.layers.comb.filt <- stereo.med.layers.comb.filt[
  !stereo.med.layers.comb.filt$layer == "WM",]
#HIGH LAYERS
stereo.high.layers1 <- lapply(setNames(c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7"), 
                                c("MB11","MB12", "MB14", "MB15", "MB18-2", "MB22", "MB23", "MB7")), function(x) {
                   y <- read.delim(paste0("/d0/home/viktor_petukhov/Data/Schizophrenia/deconv_v2/stereoscope_res_high/",
                                                    x, "_cm.tsv"), row.names = 1) %>% t
                   #correct annotations    
                   rownames(y) <- rownames(y) %>% gsub("\\.", "_", x = .)    
                   #filter low quality spots
                   y[, colnames(y) %in% names(layers.filter.list0[[x]])] %>%
                    data.frame(subtypes = factor(rownames(.), levels = hsub_order_allen_10x), .) %>%
                    melt(id.vars = "subtypes", variable.name = "barcodes", value.name = "probability") %>%
                    cbind(., sample = rep(x, dim(.)[1])) %>% 
                     cbind(., condition = diseasef[rep(x, dim(.)[1])]) %>%
                     cbind(., layer = `[`(layers.filter.list0[[x]], .[["barcodes"]])) %>%
                     group_by(., subtypes, sample, layer, condition) %>% 
                     summarize_at(., "probability", list(mean_probability = mean, spot_number = length))})

stereo.high.layers2 <- lapply(coh2, function(x) {
                   y <- read.delim(paste0("/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/deconv/",
                                                    x, ".tsv"), row.names = 1) %>% t
                   #correct annotations      
                   rownames(y) <- rownames(y) %>% gsub("\\.", "_", x = .)                 
                   #filter low quality spots
                   y[, colnames(y) %in% names(layers.filter.list1[[x]])] %>%
                      data.frame(subtypes = factor(rownames(.), levels = hsub_order_allen_10x), .) %>%
                      melt(id.vars = "subtypes", variable.name = "barcodes", value.name = "probability") %>%
                      cbind(., sample = rep(x, dim(.)[1])) %>% cbind(., condition = diseasef[rep(x, dim(.)[1])]) %>%
                      cbind(., layer = `[`(layers.filter.list2[[x]], .[["barcodes"]])) %>%
                     group_by(., subtypes, sample, layer, condition) %>% 
                     summarize_at(., "probability", list(mean_probability = mean, spot_number = length))})

stereo.high.layers <- c(stereo.high.layers1,stereo.high.layers2)
stereo.high.layers.comb <- bind_rows(stereo.high.layers)

#Filter out areas/layers with less than 5 spots
stereo.high.layers.comb.filt <- stereo.high.layers.comb[stereo.high.layers.comb$spot_number > 5, ]

#Filter out WM
stereo.high.layers.comb.filt <- stereo.high.layers.comb.filt[
  !stereo.high.layers.comb.filt$layer == "WM",]

helper funcs

null.lenth.signif <- function(x, y) {
  if (signif(x)[pastes(y)][x[pastes(y)] <= 0.05 & !is.na(x[pastes(y)])] %>% length > 0) {
    signif(x)[pastes(y)][x[pastes(y)] <= 0.05 & !is.na(x[pastes(y)])]
} else {
  NULL}}

pastes <- function(x) {
  c(paste0(x, ".L1"), paste0(x, ".L2"), paste0(x, ".L3"), paste0(x, ".L4"), paste0(x, ".L5"),
     paste0(x, ".L6")
    )}

null.length <- function(vect, x, y) {
  if (x[pastes(y)][x[pastes(y)] <= 0.05 & !is.na(x[pastes(y)])] %>% length > 0) {
  vect[x[pastes(y)] <= 0.05 & !is.na(x[pastes(y)])]
} else {
  NULL}}
library(ggplot2)
library(ggsignif)
palette_45_2 <- readRDS("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/palette_45_2.rds")
palette_45 <- readRDS("/home/mbatiuk/projects/human_schizo/MB6-MB23/2ndary/ctx_layers_SCZ_ALLEN/rds_objects/palette_45.rds")

list.stereo.high.plot <-
    lapply(setNames(hsub_order_allen_10x, hsub_order_allen_10x), function(subt) {
    ggplot(data = 
             stereo.high.layers.comb.filt[stereo.high.layers.comb.filt[["subtypes"]] == as.character(subt), ], 
            aes(x = layer, y = mean_probability, dodge = condition, fill = condition)) +
      geom_point(aes(y = mean_probability, color = condition), 
                 position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.5), 
                   size = 1.1, alpha = 0.3, show.legend = F) +
      geom_boxplot(notch = F, outlier.shape = NA, alpha = 0.3, show.legend = F, lwd = 0.4) +
      geom_signif(annotations = null.lenth.signif(wilcox.stereo.layer.high.vec.adj, subt),
                 xmin = null.length(c(0.75:5.75), wilcox.stereo.layer.high.vec.adj, subt), 
                 xmax = null.length(c(1.25:6.25), wilcox.stereo.layer.high.vec.adj, subt), 
                 y_position = null.length(c(rep(max(stereo.high.layers.comb.filt[stereo.high.layers.comb.filt[["subtypes"]] == 
                                                as.character(subt), ]$mean_probability)*1.1, 6)), wilcox.stereo.layer.high.vec.adj, subt), textsize = 7, vjust = 0.5) +
        theme_bw() +
        theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size = 11),
              axis.title.x = element_blank(),
              axis.text.y = element_text(size = 11),
              axis.title.y = element_text(size = 11),
              legend.title = element_blank(),
              legend.text = element_text(size = 11),
              plot.subtitle = element_text(size = 11, hjust = 0.5, face = "bold"),
              legend.position = "bottom",
              text = element_text(),
              plot.margin = unit(c(2.5, 13, 2.5, 1), "mm")) +
              labs(subtitle = subt, x = NULL, y = "Mean fraction") +
        scale_color_manual(values = palette_45_2[c(8, 15)]) +
        scale_fill_manual(values = palette_45_2[c(8, 15)]) +
        ylim(min = -0.001, max = max(
          stereo.high.layers.comb.filt[stereo.high.layers.comb.filt[["subtypes"]] == 
                                                as.character(subt), ]$mean_probability)*1.25)})
figS.stereo.high.plot <- cowplot::plot_grid(plotlist = list.stereo.high.plot, ncol = 5)

4. DE and GO analysis

#Read in Visium matrices
samples1 <-  read_rds("/d0-mendel/home/viktor_petukhov/Copenhagen/Schizophrenia20/cache/spatial_samples.rds")
samples2 <- readRDS("/d0/home/kdragicevic/RProjects/schizo/spatial/spatial_samples2.rds")

deconv_path1 <- "/home/viktor_petukhov/Data/Schizophrenia/deconv_v2/stereoscope_res/"
for(n in names(samples1)) {
  samples1[[n]]$deconv_probs <- read.table(paste0(deconv_path1, n, "_cm.tsv"))
  cm <- samples1[[n]]$cm
  samples1[[n]]$mit_frac <- Matrix::colSums(cm[grep("^MT-", rownames(cm)),]) / Matrix::colSums(cm)
}
deconv_path2 <- "/d0/home/kdragicevic/RProjects/schizo/spatial/prepdata/deconv2/"
for(n in names(samples2)) {
  samples2[[n]]$deconv_probs <- read.table(paste0(deconv_path2, n, ".tsv"))
  cm <- samples2[[n]]$cm
  samples2[[n]]$mit_frac <- Matrix::colSums(cm[grep("^MT-", rownames(cm)),]) / Matrix::colSums(cm)
}

samples <- c(samples1, samples2)
annot_spat <- lapply(samples, `[[`, "annotation") %>% Reduce(c, .)
#Create conos and cacoa object for DE
con_spat <- lapply(samples, `[[`, "p2") %>% Conos$new()
cao <- Cacoa$new(con_spat, 
                 ref.level="Ctr", 
                 target.level="Scz", 
                 sample.groups=setNames(c(rep("Ctr", 7),rep("Scz", 7)),unlist(samp_per_cond_spat, use.names = FALSE)), 
                 cell.groups=(annot_spat), 
                 n.cores=5)
de_spat_annot <- cao$estimatePerCellTypeDE(n.cores = 20)
#GO
library(org.Hs.eg.db)
org <- org.Hs.eg.db

library(pbapply)
go_datas <- c("BP", "CC", "MF") %>% setNames(., .) %>%
  pblapply(function(n) clusterProfiler:::get_GO_data(org.Hs.eg.db::org.Hs.eg.db, n, "ENTREZID") %>%
           as.list() %>% as.environment())

gos_spat <- list(annot = calculateGos(de_spat_annot, go_datas, n.top.genes=500))
gos_spat_dfs <- lapply(gos_spat, lapply, extractAllGODf)
gos_spat_clust <- lapply(gos_spat, lapply, clusterGos, n.clusters=20, max.pval=0.05)
cols <- list(up=circlize::colorRamp2(c(0, 1.3), c("grey98", "red")),
             down=circlize::colorRamp2(c(0, 1.3), c("grey98", "blue")))

#UP GO

plotPValueHeatmap(gos_spat_clust$annot$up$summary, cols$up)

pal <- setNames(RColorBrewer::brewer.pal(6, name = "Set2"), paste0("L", seq(1,6)))
upgo <- split.data.frame(gos_spat_dfs$annot$up, gos_spat_dfs$annot$up$Type)
upgo <- lapply(upgo, head,10)
upgo <- data.table::rbindlist(upgo)
upgo <- upgo[order(-p.adjust),]
upgo$Description %<>% make.unique()
upgo$Description %<>% factor(., levels=.)
upgo$p.adjust %<>% log10() %>% {. * -1}

ggplot(upgo, aes(p.adjust, Description, fill=Type)) + 
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  labs(x="-log10(adj. P)", y="", fill="Layers - spatial data", title="Top 10 GO terms for up-regulated DE genes (spatial)") + 
  geom_vline(xintercept = 1.3, linetype="dotted") + scale_fill_manual(values = pal[upgo$Type %>% rev %>% unique]) +
    scale_y_discrete(labels=setNames(sapply(strsplit(as.character(upgo$Description), ".", fixed= TRUE), "[[", 1),upgo$Description))

#DOWN GO

plotPValueHeatmap(gos_spat_clust$annot$down$summary, cols$down)

downgo <- split.data.frame(gos_spat_dfs$annot$down, gos_spat_dfs$annot$down$Type)
downgo <- lapply(downgo, head,10)
downgo <- data.table::rbindlist(downgo)
downgo <- downgo[order(-p.adjust),]
downgo$Description %<>% make.unique()
downgo$Description %<>% factor(., levels=.)
downgo$p.adjust %<>% log10() %>% {. * -1}

ggplot(downgo, aes(p.adjust, Description, fill=Type)) + 
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  labs(x="-log10(adj. P)", y="", fill="Layers - spatial data", title="Top 10 GO terms for down-regulated DE genes (spatial)") + 
  geom_vline(xintercept = 1.3, linetype="dotted") + scale_fill_manual(values = pal[downgo$Type %>% rev %>% unique]) +
    scale_y_discrete(labels=setNames(sapply(strsplit(as.character(downgo$Description), ".", fixed= TRUE), "[[", 1),downgo$Description))

snRNAseq and Visium overlap

gos_sc <- goslW
go_df <- names(gos_sc$high$up) %>% setNames(., .) %>% 
  lapply(function(n) as_tibble(gos_sc$high$up[[n]]@result) %>% mutate(CellType=n)) %>% 
  plyr::rbind.fill()

gos_sc_filt <- gos_sc %>% lapply(lapply, function(x) x) %>% .["high"] %>% .[[1]]
gos_sc_aggr <- gos_sc_filt %>% names() %>% lapply(function(dir) {
  gos_sc_filt[[dir]] %>% names() %>%
    lapply(function(type) {
      gos_sc_filt[[dir]][[type]]@result %>% 
        filter(pvalue < 0.05) %>% 
        mutate(Type=type, Direction=dir) %>% 
        {if(nrow(.) < 4) . else .[1:3,]}
    })
}) %>% 
  do.call(c, .) %>% .[sapply(., nrow) > 0] %>% do.call(rbind, .) %>%
  .[order(.$p.adjust, decreasing=T),] %>% split(.$Direction) %>% lapply(function(df) {
  df$Description %<>% make.unique() %>% factor(., levels=.)
  df$p.adjust %<>% log10() %>% {. * -1}
  df
})

gos_spat_dfs_filt <- lapply(gos_spat_dfs, function(dfs) {
  names(dfs) %>% setNames(., .) %>% 
    lapply(function(n) {
      filter(dfs[[n]], Description %in% gos_sc_aggr[[n]]$Description) %>% 
        mutate(p.adjust=p.adjust(pvalue, method="BH"))
    })
})
plotPValueDfHeatmap(gos_spat_dfs_filt$annot$up, cols$up, p.threshold = 0.05) + ggtitle("Upregulated GO terms; Visium:snRNAseq overlap")

upgo <- split.data.frame(gos_spat_dfs_filt$annot$up, gos_spat_dfs_filt$annot$up$Type)

upgo <- lapply(upgo, head,3)
upgo <- data.table::rbindlist(upgo)
upgo <- upgo[order(-p.adjust),]
upgo$Description %<>% make.unique()
upgo$Description %<>% factor(., levels=.)
upgo$p.adjust %<>% log10() %>% {. * -1}
library(ggplot2)
upgobar <- ggplot(upgo, aes(p.adjust, Description, fill=Type)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  theme_bw()+
  theme(plot.title.position = "plot")+
  labs(x="-log10(adj. P)", y="", fill="Layers - spatial data", title="Top 3 upregulated GO terms; Visium:snRNA-seq overlap") + 
  geom_vline(xintercept = 1.3, linetype="dotted") + scale_fill_manual(values = pal[upgo$Type %>% rev %>% unique]) +
    scale_y_discrete(labels=setNames(sapply(strsplit(as.character(upgo$Description), ".", fixed= TRUE), "[[", 1),upgo$Description))

downgo <- split.data.frame(gos_spat_dfs_filt$annot$down, gos_spat_dfs_filt$annot$down$Type)

downgo <- lapply(downgo, head,3)
downgo <- data.table::rbindlist(downgo)
downgo <- downgo[order(-p.adjust),]
downgo$Description %<>% make.unique()
downgo$Description %<>% factor(., levels=.)
downgo$p.adjust %<>% log10() %>% {. * -1}

downgobar <- ggplot(downgo, aes(p.adjust, Description, fill=Type)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  theme_bw()+
  theme(plot.title.position = "plot")+
  labs(x="-log10(adj. P)", y="", fill="Layers - spatial data", title="Top 3 downregulated GO terms; Visium:snRNA-seq overlap") +  
  geom_vline(xintercept = 1.3, linetype="dotted") + scale_fill_manual(values = pal[downgo$Type %>% rev %>% unique]) + 
    scale_y_discrete(labels=setNames(sapply(strsplit(as.character(downgo$Description), ".", fixed= TRUE), "[[", 1),downgo$Description))